package com.example.demo.pojo;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DocumentEntry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * Created by admin on 2019/1/15.
 */
public class JsoupTest {

    /**
     * 从csdn读取和加工正文
     * @param uriStr
     * @return
     */
    private static String readHtml(String uriStr){
        StringBuffer sb=new StringBuffer();
        sb.append("<html><head>");
        try {
            URI uri=new URI(uriStr);
            Document doc= Jsoup.parse(uri.toURL(),10000);
            sb.append(doc.select("style").outerHtml());
            Elements elements=doc.select("link[rel=\"stylesheet\"]");
            String url=null;
            for(Element element:elements){
                url=element.attr("href");
                sb.append("<style type=\"text/css\" url='");
                sb.append(url);
                sb.append("'>");
                sb.append(IOUtils.toString(new URI(url),"UTF-8"));
                sb.append("</style>");
                sb.append("\r\n");
            }
            sb.append("<style type=\"text/css\">");
            sb.append("#article_content,.fontclass{font-family:\"Microsoft YaHei\"}");
            sb.append("</style>");
            sb.append("</head><body>");
            sb.append("<h1 class='title-article'>下载地址:<b>").append(uriStr).append("</b></h1>");
            doc.select("#article_content p").toggleClass("fontclass");
            sb.append(doc.select(".blog-content-box").outerHtml());
            sb.append("</body></html>");
        } catch (Exception e) {
            e.printStackTrace();
        }
        return sb.toString();
    }

    public static boolean writeDocFile( File file, String html,String encoding) {
        boolean w = false;
        File fileDir=file.getParentFile();
        if (!fileDir.exists()) {
            fileDir.mkdirs();
        }
        try {
            byte b[] = html.getBytes(encoding);
            ByteArrayInputStream bais = new ByteArrayInputStream(b);
            POIFSFileSystem poifs = new POIFSFileSystem();
            DirectoryEntry directory = poifs.getRoot();
            DocumentEntry documentEntry = directory.createDocument(
                    "WordDocument", bais);
            FileOutputStream ostream = new FileOutputStream(file);
            poifs.writeFilesystem(ostream);
            bais.close();
            ostream.close();
        }catch(IOException e){
            e.printStackTrace();
        }
        return w;
    }

    /**从csdn截取正文
     * @param uriStr
     * @throws IOException
     * @throws URISyntaxException
     */
    public static void writeCSDNWordFile(String uriStr,File wordFile)  {
        writeDocFile(wordFile,readHtml(uriStr),"UTF-8");
    }

    /**从csdn截取正文
     * @param uriStr
     * @throws IOException
     * @throws URISyntaxException
     */
    public static void writeCSDNHtmlFile(String uriStr,File localFile) throws IOException {
        FileUtils.writeStringToFile(localFile,readHtml(uriStr),"UTF-8");
    }

    public static void main(String[] args) throws IOException, URISyntaxException {
        String html="F:/test/word/jxl-excel.html";
        String doc="F:/test/word/jxl-excel.doc";
        String uri="https://blog.csdn.net/qq_41617848/article/details/107619810";
        writeCSDNHtmlFile(uri,new File(html));
        writeCSDNWordFile(uri,new File(doc));
    }
}
